# -*- coding: utf-8 -*-
import random
import scrapy
from scrapy import Request
from gd_collection.rules import *
from zc_core.dao.sku_pool_dao import SkuPoolDao
from zc_core.dao.spu_dao import SpuDao
from zc_core.dao.batch_dao import BatchDao
from zc_core.util.batch_gen import time_to_batch_no
from zc_core.util.done_filter import DoneFilter
from datetime import datetime
from zc_core.spiders.base import BaseSpider


class FullSpider(BaseSpider):
    name = 'full'
    # 经销商链接
    item_url = 'https://gdgpo.czt.gd.gov.cn/gpmall-goodslibrary-interface/goodsQuotePrice/goodsDealersinfo'

    def __init__(self, batchNo=None, *args, **kwargs):
        super(FullSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        # 创建批次记录
        BatchDao().create_batch(self.batch_no)
        # 避免重复采集
        self.done_filter = DoneFilter(self.batch_no, filter_key='spuId', fields={'_id': 1, 'spuId': 1})
        self.size = 10

    def _build_list_req(self, spu_id, page, batch_no):
        return Request(
            method='POST',
            url=self.item_url,
            meta={
                'reqType': 'item',
                '_id': spu_id,
                'page': page,
                'batchNo': batch_no
            },
            body=json.dumps({"goodspriceguid": spu_id, "suppliername": "", "pageno": page,
                             "pagesize": self.size, "sortby": "goodsquotepricemoney", "order": "asc"}),
            headers={
                'Content-Type': 'application/json'
            },
            callback=self.parse_item_data,
            errback=self.error_back,
            dont_filter=True,
            priority=260
        )

    def start_requests(self):

        settings = get_project_settings()
        while_list = settings.get("CATALOG_WHITE_LIST")
        if while_list:
            pool_list = SpuDao().get_batch_spu_list(self.batch_no,
                                                    fields={'_id': 1, 'spuId': 1, 'batchNo': 1},
                                                    query={"$or": while_list})
        else:
            pool_list = SpuDao().get_batch_spu_list(self.batch_no, fields={'offlineTime': 0})
        self.logger.info('全量：%s' % (len(pool_list)))
        random.shuffle(pool_list)
        for spu in pool_list:
            spu_id = spu.get('_id')
            batch_no = spu.get('batchNo')
            offline_time = spu.get('offlineTime', 0)
            settings = get_project_settings()
            if offline_time > settings.get('MAX_OFFLINE_TIME', 2):
                self.logger.info('忽略: [%s][%s]', spu_id, offline_time)
                continue
            # 避免重复采集
            if self.done_filter.contains(spu_id) and not settings.get('FORCE_RECOVER', False):
                self.logger.info('已采: [%s]', spu_id)
                continue
            # 采集商品
            yield self._build_list_req(spu_id, 1, batch_no)

    # 处理ItemData
    def parse_item_data(self, response):
        meta = response.meta
        batch_no = meta.get('batchNo')
        _id = meta.get('_id')
        current_page = meta.get('page')
        datas = parse_item_data(response)
        if datas:
            for data in datas:
                self.logger.info('商品: [%s]' % data.get('skuId'))
                yield data

        if current_page == 1:
            totals = parse_full_total_page(response)
            for page in range(2, totals + 1):
                yield self._build_list_req(_id, page, batch_no)
